Now we analyze emmissions per capita#

from hashlib import sha1
import altair as alt
import pandas as pd
import numpy as np
from hashlib import sha1
alt.data_transformers.disable_max_rows()
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 2
      1 from hashlib import sha1
----> 2 import altair as alt
      3 import pandas as pd
      4 import numpy as np

ModuleNotFoundError: No module named 'altair'

Use read_csv from pandas to load the data from the data folder and assign it to a variable named gapminder_df.#

Make sure to parse any time columns using the parse_dates argument.#

gapminder_df = pd.read_csv("C:/Users/sindi/Downloads/world-data-gapminder.csv", parse_dates=['year'])
gm_recent_co2 = gapminder_df[gapminder_df["year"]=='2014']
co2_top_40 = gm_recent_co2.nlargest(40, 'co2_per_capita' )

Plot of the worst polluting countries in the world#

co2_plot = alt.Chart(co2_top_40).mark_bar().encode(
alt.X("co2_per_capita", title="CO2 Emissions per capita"),
alt.Y("country", title="Country", sort='x'),
alt.Color("region", title='Region')

).properties(title="Worst Polluters")
co2_plot
gm_co2_total = gapminder_df.assign(co2_total=(gapminder_df['co2_per_capita'])*(gapminder_df['population']))
gm_co2_total
country year population region sub_region income_group life_expectancy income children_per_woman child_mortality pop_density co2_per_capita years_in_school_men years_in_school_women co2_total
0 Afghanistan 1800-01-01 3280000 Asia Southern Asia Low 28.2 603 7.00 469.0 NaN NaN NaN NaN NaN
1 Afghanistan 1801-01-01 3280000 Asia Southern Asia Low 28.2 603 7.00 469.0 NaN NaN NaN NaN NaN
2 Afghanistan 1802-01-01 3280000 Asia Southern Asia Low 28.2 603 7.00 469.0 NaN NaN NaN NaN NaN
3 Afghanistan 1803-01-01 3280000 Asia Southern Asia Low 28.2 603 7.00 469.0 NaN NaN NaN NaN NaN
4 Afghanistan 1804-01-01 3280000 Asia Southern Asia Low 28.2 603 7.00 469.0 NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
38977 Zimbabwe 2014-01-01 15400000 Africa Sub-Saharan Africa Low 57.0 1910 3.90 64.3 39.8 0.78 10.9 10.0 12012000.0
38978 Zimbabwe 2015-01-01 15800000 Africa Sub-Saharan Africa Low 58.3 1890 3.84 59.9 40.8 NaN 11.1 10.2 NaN
38979 Zimbabwe 2016-01-01 16200000 Africa Sub-Saharan Africa Low 59.3 1860 3.76 56.4 41.7 NaN NaN NaN NaN
38980 Zimbabwe 2017-01-01 16500000 Africa Sub-Saharan Africa Low 59.8 1910 3.68 56.8 42.7 NaN NaN NaN NaN
38981 Zimbabwe 2018-01-01 16900000 Africa Sub-Saharan Africa Low 60.2 1950 3.61 55.5 43.7 NaN NaN NaN NaN

38982 rows × 15 columns

Top 40 CO2 emmiters#

co2_total_top_40 = gm_co2_total[gm_co2_total['year']=='2014'].nlargest(40, 'co2_total')
co2_total_top_40.head()
country year population region sub_region income_group life_expectancy income children_per_woman child_mortality pop_density co2_per_capita years_in_school_men years_in_school_women co2_total
7222 China 2014-01-01 1390000000 Asia Eastern Asia Upper middle 75.9 12800 1.61 11.5 148.00 7.40 10.9 9.65 1.028600e+10
37225 United States 2014-01-01 318000000 Americas Northern America High 78.9 51800 1.95 6.8 34.70 16.50 14.5 14.90 5.247000e+09
15763 India 2014-01-01 1290000000 Asia Southern Asia Lower middle 67.8 5390 2.38 47.7 435.00 1.73 8.4 5.66 2.231700e+09
28903 Russia 2014-01-01 144000000 Europe Eastern Europe Upper middle 70.8 24900 1.70 8.3 8.78 11.90 13.8 14.50 1.713600e+09
17515 Japan 2014-01-01 128000000 Asia Eastern Asia High 83.6 37300 1.43 2.8 352.00 9.47 15.0 15.40 1.212160e+09

Continent comparison#

co2_total_plot = (
    alt.Chart(co2_total_top_40)
    .mark_bar()
    .encode(
        alt.X("co2_total", title="Total CO2"),
        alt.Y("country", title="Country", sort="x"),
        alt.Color("region", title="Region"),
    )
    .properties(title="Worst Polluters Total")
)
co2_total_plot
total_co2_plot = alt.Chart(gm_co2_total).mark_area().encode(
alt.X("year", title="Year"),
alt.Y("sum(co2_total)", title="CO2 Emissions"),
alt.Color('region', title="Region")
).properties(title="Total CO2 Emissions")
total_co2_plot

Some of the equations used in the analysis#

Here is the sample standard deviation: \begin{equation} \sigma = \sqrt\frac{\sum\limits_{i=1}^{n}(y_i – \bar{y})^2} {n – 1} \end{equation}

Population regression line: \begin{equation} Y_i = \beta_0 + \beta_1 X_i + \epsilon_i \end{equation}

from IPython.display import Image
#Image(filename="jb-book2/image/polluted.png")